/*
Code to create match rates to w-2 files and scale-up factor
*/

/*
Use this code to get at the match rates to 1040 ownership
*/
do "${dodir}/make_globals.do"

use "${datadir}/distributional_data", clear

replace match_type_first = match_type if match_type == 8

//###########

//Dummy variable for matching to w-2s
gen matched = totcount != . 

//clean up the businesstype variables for use in scale-ups
rename businesstype_first businesstype
gen cond_bus_type = businesstype
replace cond_bus_type = "non-profit" if inlist(businesstype, "501(c)3 â Non Profit", "501(c)19 â Non Profit Veterans","501(c)3 â Non Profit", "501(c)6 â Non Profit Membership", "Non-Profit Childcare Center", "Non-Profit Organization", "Tribal Concerns", "501(c) â Non Profit except 3,4,6,")
replace cond_bus_type = "sole" if inlist(businesstype, "Independent Contractors", "Qualified Joint-Venture (spouses)", "Rollover as Business Start-Ups (ROB", "Self-Employed Individuals", "Single Member LLC", "Sole Proprietorship")
replace cond_bus_type = "partnership" if inlist(businesstype, "Cooperative", "Housing Co-op", "Joint Venture", "Limited Liability Partnership", "Partnership")
replace cond_bus_type = "c_corp" if inlist(businesstype, "Corporation")
replace cond_bus_type = "s_corp" if inlist(businesstype, "Subchapter S Corporation")
replace cond_bus_type = "ownership" if inlist(businesstype, "Employee Stock Ownership Plan(ESOP)", "Tenant in Common", "Trust")
replace cond_bus_type = "prof_assoc" if inlist(businesstype, "Professional Association")
replace cond_bus_type = "other" if inlist(businesstype, "")
replace cond_bus_type = "llc" if inlist(businesstype, "Limited  Liability Company(LLC)")

//Collapse for totals
preserve 
gcollapse (count) loannumber_first (sum) forgivenessamount_first jobsreported_first, by(matched cond_bus_type)
export excel "${outdir}/sum_stats/scale_up.xlsx", firstrow(variables) sheet("w2", replace)


//Reshape to make division easier
reshape wide loannumber forgivenessamount jobsreported, i(cond_bus_type) j(matched)

//Get inverse match rates
gen loan_w2_wt = (loannumber_first1 + loannumber_first0) / loannumber_first1
gen dollar_w2_wt = (forgivenessamount_first1 + forgivenessamount_first0) / forgivenessamount_first1
gen job_w2_wt = (jobsreported_first1 + jobsreported_first0) / jobsreported_first1

keep cond_bus_type *wt

save "${datadir}/scale_ups/w2", replace

restore

//Spread sheet of match rate to W-2 for loans matched to an EIN
preserve 
gcollapse (count) loannumber_first (sum) forgivenessamount_first jobsreported_first if id >= 2e+9, by(matched)
export excel "${outdir}/sum_stats/ein_w2_match.xlsx", firstrow(variables) replace

restore


//Now compare the distributions of PPP and non-PPP firms
gen took_ppp = forgivenessamount_first != .
preserve


gcollapse (sum) totcount w_c* , by(took_ppp)

foreach var in c20 c40 c60 c80 c90 c95 c99 c100 {
	gen pct_`var' = w_`var' /totcount
}

export excel "${outdir}/sum_stats/wage_dist_comp.xlsx", firstrow(variables)
restore

//Now do a firm size comparison
preserve

gcollapse (mean) totcount w_c* , by(took_ppp)


export excel "${outdir}/sum_stats/wage_size_comp.xlsx", firstrow(variables)
restore
